This Rmarkdown will perform some comparisons against the carbon usage stats generate as part of DRAM/GROW to various carbon metrics, namely compound classes. We’ll be (tentatively) using the S19S data published in Garayburu-Caruso et al., 2020. This was generated by aligning the surface water and sediment samples together, so we’ll need to trim the sediment samples out.
A secondary function of this Rmarkdown is to generate summary files based off of the FTICR-MS data so that additional correlations can be performed by others.
Loading in packages, setting options, and such
# options
merge.reps = F
# organizational packages
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(ggthemes)
library(ggrepel)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(Rfast)
## Loading required package: Rcpp
## Loading required package: RcppZiggurat
## Loading required package: RcppParallel
##
## Attaching package: 'RcppParallel'
##
## The following object is masked from 'package:Rcpp':
##
## LdFlags
##
##
## Rfast: 2.0.9
## ___ __ __ __ __ __ __ __ __ __ _ _ __ __ __ __ __ __ __ __ __ __ __
## | __ __ __ __ | | __ __ __ __ _/ / \ | __ __ __ __ / /__ __ _ _ __ __\
## | | | | | | / _ \ | | / /
## | | | | | | / / \ \ | | / /
## | | | | | | / / \ \ | | / /
## | |__ __ __ __| | | |__ __ __ __ / / \ \ | |__ __ __ __ _ / /__/\
## | __ __ __ __| | __ __ __ __| / /__ _ __\ \ |_ __ __ __ _ | / ___ /
## | \ | | / _ _ _ _ _ _ \ | | \/ / /
## | |\ \ | | / / \ \ | | / /
## | | \ \ | | / / \ \ | | / /
## | | \ \ | | / / \ \ | | / /
## | | \ \__ __ _ | | / / \ \ _ __ __ __ _| | / /
## |_| \__ __ __\ |_| /_/ \_\ /_ __ __ __ ___| \/ team
##
## Attaching package: 'Rfast'
##
## The following object is masked from 'package:dplyr':
##
## nth
##
## The following objects are masked from 'package:purrr':
##
## is_integer, transpose
# analytical
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
# functions
johnston_set = function(mol){
# Johnston SE et al., 2021 - Biogeochemistry
# https://link.springer.com/article/10.1007/s10533-021-00876-7#Sec2
# empty object
johnston_set = rep(NA, nrow(mol))
# boundaries
johnston_set[which(mol$AI_Mod < 0.5 &
mol$HtoC_ratio < 1.5 &
mol$OtoC_ratio < 0.5)] = "Highly Unsaturated and Phenolic, low O/C"
johnston_set[which(mol$AI_Mod < 0.5 &
mol$HtoC_ratio < 1.5 &
mol$OtoC_ratio >= 0.5)] = "Highly Unsaturated and Phenolic, high O/C"
johnston_set[which(mol$HtoC_ratio >= 1.5, mol$N == 0)] = "Aliphatic"
johnston_set[which(mol$AI_Mod >= 0.67)] = "Condensed Aromatic"
johnston_set[which(mol$AI_Mod > 0.5 & mol$AI_Mod < 0.67)] = "Polyphenolic"
johnston_set[which(mol$HtoC_ratio >= 1.5, mol$N > 0)] = "Peptide-like"
johnston_set[which(mol$OtoC_ratio > 0.9)] = "Sugar-like"
# write out
return(johnston_set)
} # could be case_when, but nervous
roebuck_set = function(mol){
# Roebuck JA et al., 2022 - Geophyical Research Letters
# https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2022GL099535
# modified from Seidel et al., 2015 - Marine Chemistry
# https://www.sciencedirect.com/science/article/pii/S0304420315300049
# empty object
roebuck_set = rep(NA, nrow(mol))
# boundaries
roebuck_set[which(mol$AI_Mod >= 0.67 & mol$C >= 15)] = "Polycyclic Aromatic"
roebuck_set[which(mol$AI_Mod >= 0.67 & mol$C >= 15 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "Polycyclic Aromatic w/ Heteroatoms"
roebuck_set[which(mol$AI_Mod >= 0.67 & mol$C < 15 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "High Aromaticity (<15C)"
roebuck_set[which(mol$AI_Mod >= 0.5 & mol$AI_Mod < 0.67)] = "Highly Aromatic including Polyphenols"
roebuck_set[which(mol$AI_Mod >= 0.5 & mol$AI_Mod < 0.67 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "Highly Aromatic w/ Heteroatoms"
roebuck_set[which(mol$AI_Mod < 0.5 & mol$HtoC_ratio < 1.5)] = "Highly Unsaturated"
roebuck_set[which(mol$AI_Mod < 0.5 & mol$HtoC_ratio < 1.5 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "Highly Unsaturated w/ Heteroatoms"
roebuck_set[which(mol$HtoC_ratio >= 1.5 & mol$HtoC_ratio < 2)] = "Unsaturated Aliphatics"
roebuck_set[which(mol$HtoC_ratio >= 1.5 & mol$HtoC_ratio < 2 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "Unsaturated Aliphatics w/ Heteroatoms (including peptides)"
roebuck_set[which(mol$HtoC_ratio >= 2 & mol$OtoC_ratio < 0.9)] = "Saturated Compounds (including lipids)"
roebuck_set[which(mol$HtoC_ratio >= 2 & mol$OtoC_ratio < 0.9 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "Saturated Compounds w/ Heteroatoms"
roebuck_set[which(mol$HtoC_ratio >= 2 & mol$OtoC_ratio >= 0.9)] = "Saturated Compounds (including carbohydrates)"
roebuck_set[which(mol$mol$HtoC_ratio >= 2 & mol$OtoC_ratio >= 0.9 &
(mol$N > 0 | mol$S > 0 | mol$P > 0))] = "Carbohydrate-like w/ Heteroatoms (including amino sugars)"
# write out
return(roebuck_set)
}
# define boundary set plot function
summary_plot = function(summary, title){
summary %>% gather(Sample, Count, -Class) %>%
group_by(Sample) %>%
mutate(`Relative Count (%)` = (Count/sum(Count))*100) %>%
ungroup() %>%
left_join(carbon.use, by = c("Sample" = "code"),
relationship = "many-to-many") %>%
filter(!is.na(abund)) %>%
mutate(abund = abund*100) %>%
rename(`Relative Expression (%)` = abund) %>%
ggplot(aes(x = `Relative Expression (%)`, y = `Relative Count (%)`))+
geom_point(aes(color = Guild))+
geom_smooth(method = "lm")+
scale_color_manual(values = color)+
# stat_cor(method = "spearman")+
facet_grid(Class~Guild, scales = "free", labeller = label_wrap_gen(width=15))+
ggtitle(title)+
theme_bw()+
theme(legend.position = "none")
}
# define boundary set correlation function
summary_stat = function(summary){
# merge summary to carbon use
unique.class = unique(summary$Class)
unique.guild = unique(carbon.use$Guild)
# empty object
correlations = NULL
# looping through pairwise comparisons
for(i in 1:length(unique.class)){
for(j in 1:length(unique.guild)){
# select current comparisons
curr.class = unique.class[i]
curr.guild = unique.guild[j]
# create temporary object
temp = summary %>% gather(code, Count, -Class) %>%
group_by(code) %>% mutate(Rel_Count = (Count/sum(Count)*100)) %>%
filter(Class == curr.class) %>%
left_join(carbon.use[which(carbon.use$Guild %in% curr.guild),], by = "code") %>%
filter(!is.na(Guild)) %>%
mutate(abund = abund*100)
# run spearman correlation
temp = rcorr(temp$Rel_Count, temp$abund)
# build output
temp = data.frame(Class = curr.class, Guild = curr.guild,
rho = temp$r[1,2], p_value = temp$P[1,2])
# add to correlation object
correlations = rbind(correlations, temp)
}
}
# return correlations
return(correlations)
}
# import lambda functions
source("~/Documents/Code Information/FTICR Scripts/R Functions/getLambda.R")
# define colors
color = c(`chemolithoautotroph` = "#8CADD2",
`heterotroph-aromatic` = "#198035",
`heterotroph-polymer` = "#093614",
`heterotroph-sugar` = "#1F7C37",
`methyl_c1` = "#7D52A7",
`SCFA and alcohol conversions` = "#7A82BA",
`heterotroph-ch4` = "#72006D")
color = color[order(names(color))]
# knitr options
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "~/Documents/PNNL Analyses/GROW Paper/")
Handles the import of data
# set working directory (redunant)
setwd( "~/Documents/PNNL Analyses/GROW Paper/")
# load in ICR data
if(merge.reps){ # load in unmerged data if merge.data == T
data = read.csv("Processed_S19S_Sed-Wat_8.12_Data.csv", row.names = 1)
mol = read.csv("Processed_S19S_Sed-Wat_8.12_Mol.csv", row.names = 1)
} else { # load in merged data
data = read.csv("Merged_S19S_Wat_8.12_Data.csv", row.names = 1)
mol = read.csv("Merged_S19S_Wat_8.12_Mol.csv", row.names = 1)
}
# load in transformation data
if(merge.reps){ # if the data isn't merged, there is no transformation profile to load
print("Transformation data has not been generated yet.")
} else{ # loading in transformation data
trans = read.csv("Merged_S19S_Wat_8.12_Trans_Profiles.csv", row.names = 1)
}
# load in carbon use data
carbon.use = read_xlsx("carbon_usage.xlsx")
# load in metadata
metadata = read_xlsx("Table S1 - S19S Metadata.xlsx")
# confirm matching data and mol
if(!identical(row.names(data), row.names(mol))){
stop("The FTICR-MS data objects do not having matching masses.")
}
This section handles the merging of reps - a peak must be present in at least two out of three reps for a sample before it is considered “present” in the sample. Not only does this improve the confidence of the data, it also removes noise.
This section is also dropping the sediment data from the data object.
# merge replicates
if(merge.reps){ # checking to see if the merge flag is active
# list files to see if merging already happened (computational insurance)
file.list = list.files(pattern = "Merged")
if(length(file.list) > 0){
stop("You've already merged the data, so I'm saving you some time (even though you are just me who made a mistake).")
}
# removing sediment samples
data = data[,-grep("Sed_Field", colnames(data))]
# removing peaks present only in sediment
mol = mol[-which(rowSums(data) == 0),]
data = data[-which(rowSums(data) == 0),]
# identify samples
samp = metadata$ID
# create object to store data
mean.data = as.data.frame(matrix(data = NA,
nrow = nrow(data),
ncol = length(samp),
dimnames = list(row.names(data),
samp)))
# loop through samples and merge
for(curr.col in colnames(mean.data)){
# find samples
temp = as.data.frame(data[,grep(paste0(curr.col, "_"), colnames(data))])
# merge replicates, checking to ensure replicates exist...
if(ncol(temp) == 1){
temp[temp > 1] = 0 # If there is only one replicate for a sample,
# I'm setting intensities to zero to so it gets
# filtered
} else {
w = which(apply(temp, 1, function(x) length(which(x > 0))) >= 2) # find peaks with values in two reps
temp[-w,] = 0 # set intensities to 0 if peaks missing in >2 reps
}
# average values together and fill in empty object
mean.data[,curr.col] = apply(temp, 1, mean, na.rm = T)
# clean up
rm(temp, w)
}
# S19S_0001 is missing, dropping it
mean.data = mean.data[,!is.na(mean.data[1,])]
# setting mean.data to data
data = mean.data
rm(mean.data)
# remove peaks dropped during merging
mol = mol[-which(rowSums(data) == 0),]
data = data[-which(rowSums(data) == 0),]
# writing data out
write.csv(data, "Merged_S19S_Wat_8.12_Data.csv", quote = T)
write.csv(mol, "Merged_S19S_Wat_8.12_Mol.csv", quote = T)
}
Summarizes our FTICR-MS data by various boundary sets and some cheminformatic derived variables (e.g., NOSC, AI-Mod, etc.).
### preprocessing
# calculating new boundary sets
mol$bs_johnston = johnston_set(mol)
mol$bs_roebuck = roebuck_set(mol)
# select only assigned formulas
data = data[!is.na(mol$MolForm),]
mol = mol[!is.na(mol$MolForm),]
# calculate lambda
get_comp = get_compositions(mol) # lambda requires specific elcomp formulation
lambda = as.data.frame(get_lambda(get_comp$chemical_compositions)) # calc lambda
# name lambda object
names <- rep("", 62)
names[1:12] <- c("delGcox0","delGd0","delGcat0","delGan0","delGdis0","lambda0",
"delGcox","delGd","delGcat","delGan","delGdis","lambda")
stoich_colnames <- c("donor","h2o","hco3","nh4","hpo4","hs","h","e","acceptor","biom")
stoich_types <- c("stoichD","stoichA","stoichCat","stoichAn","stoichMet")
for (i in 1:length(stoich_types)) {
names[((i-1)*10+13):(i*10+12)] <- array(sapply(stoich_types[i], paste, stoich_colnames, sep="_"))
}
colnames(lambda) <- names
# rearrange lambda
lambda['MolForm'] <- get_comp$formulas
lambda = as.data.frame(lambda[,c("MolForm", "delGcox0", "delGcox", "lambda0", "lambda", "delGd0", "delGd")])
colnames(lambda) = c("MolForm","delGcox0PerCmol","delGcoxPerCmol", "lamO20","lamO2","delGd0","delGd")
lambda = lambda[!duplicated(lambda$MolForm),]
# add lamba to mol
mol = mol %>% left_join(lambda, by = "MolForm")
# cleanup
rm(names, stoich_colnames, stoich_types, get_comp, lambda)
### summarize metrics by average
# create object
metric_summary = data.frame(Sample = colnames(data), AI_Mod = NA, DBE = NA,
NOSC = NA, lambda = NA, HtoC = NA, OtoC = NA,
NtoC = NA, PtoC = NA, NtoP = NA)
# loop through samples and average the metrics
for(i in 1:ncol(data)){
# select formulas from sample
w = which(data[,i] > 0)
# temporary mol
temp = mol[w,]
# average values
metric_summary$AI_Mod[i] = mean(temp$AI_Mod)
metric_summary$DBE[i] = mean(temp$DBE)
metric_summary$NOSC[i] = mean(temp$NOSC)
metric_summary$lambda[i] = mean(temp$lamO2)
metric_summary$HtoC[i] = mean(temp$HtoC_ratio)
metric_summary$OtoC[i] = mean(temp$OtoC_ratio)
metric_summary$NtoC[i] = mean(temp$NtoC_ratio)
metric_summary$PtoC[i] = mean(temp$PtoC_ratio)
metric_summary$NtoP[i] = mean(temp$NtoP_ratio, na.rm = T)
}
# add in molecular formula richness
metric_summary = metric_summary %>%
left_join(data.frame(Sample = colnames(data),
Richness = apply(data, 2, function(x) length(which(x > 0)))),
by = "Sample")
### summarize by Bailey set
# https://www.sciencedirect.com/science/article/pii/S0038071716306447#sec2
# create empty object
bailey_set_summary = data.frame(Class = unique(mol$bs2_class))
# loop through samples, counting classes
for(i in 1:ncol(data)){
# select formulas from sample
w = which(data[,i] > 0)
# count classes of detected formulas
temp = data.frame(table(mol$bs2_class[w]))
# adjust column names
colnames(temp) = c("Class", colnames(data)[i])
# add counts to previously created object
bailey_set_summary = bailey_set_summary %>%
left_join(temp, by = c("Class"))
}
### summarize by Johnston set
# create empty object
johnston_set_summary = data.frame(Class = unique(mol$bs_johnston))
# loop through samples, counting classes
for(i in 1:ncol(data)){
# select formulas from sample
w = which(data[,i] > 0)
# count classes of detected formulas
temp = data.frame(table(mol$bs_johnston[w]))
# adjust column names
colnames(temp) = c("Class", colnames(data)[i])
# add counts to previously created object
johnston_set_summary = johnston_set_summary %>%
left_join(temp, by = c("Class"))
}
### summarize by Roebuck set
# create empty object
roebuck_set_summary = data.frame(Class = unique(mol$bs_roebuck))
# loop through samples, counting classes
for(i in 1:ncol(data)){
# select formulas from sample
w = which(data[,i] > 0)
# count classes of detected formulas
temp = data.frame(table(mol$bs_roebuck[w]))
# adjust column names
colnames(temp) = c("Class", colnames(data)[i])
# add counts to previously created object
roebuck_set_summary = roebuck_set_summary %>%
left_join(temp, by = c("Class"))
}
rm(w,i)
# cleaning up
bailey_set_summary = bailey_set_summary[!is.na(bailey_set_summary$Class),]
johnston_set_summary = johnston_set_summary[!is.na(johnston_set_summary$Class),]
roebuck_set_summary = roebuck_set_summary[!is.na(roebuck_set_summary$Class),]
# writing summaries
write.csv(metric_summary, "Summaries/Merged_S19S_Wat_8.12_Metric-Summary.csv",
quote = F, row.names = F)
write.csv(bailey_set_summary, "Summaries/Merged_S19S_Wat_8.12_Bailey-Summary.csv",
quote = F, row.names = F)
write.csv(johnston_set_summary, "Summaries/Merged_S19S_Wat_8.12_Johnston-Summary.csv",
quote = F, row.names = F)
write.csv(roebuck_set_summary, "Summaries/Merged_S19S_Wat_8.12_Roebuck-Summary.csv",
quote = F, row.names = F)
Performing the correlations between the carbon use traits and the various FTICR-MS derived variables.
### Metric summary relationships
# plot
metric_summary %>% gather(Variable, Value, -Sample) %>%
group_by(Sample) %>%
left_join(carbon.use, by = c("Sample" = "code"),
relationship = "many-to-many") %>%
filter(!is.na(abund)) %>%
mutate(abund = abund*100) %>%
rename(`Relative Expression (%)` = abund) %>%
ggplot(aes(x = `Relative Expression (%)`, y = Value))+
geom_point(aes(color = Guild))+
geom_smooth(method = "lm")+
scale_color_manual(values = color)+
facet_grid(Variable~Guild, scales = "free", labeller = label_wrap_gen(width=15))+
ggtitle("DOM Metric Relationships")+
theme_bw()+
theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'
ggsave("Figures/Guild-Metric_Relationships.pdf", height = 12, width = 12)
## `geom_smooth()` using formula = 'y ~ x'
# stats
unique.metric = colnames(metric_summary)
unique.guild = unique(carbon.use$Guild)
# empty object
metric_stats = NULL
# looping through pairwise comparisons
for(i in 2:length(unique.metric)){
for(j in 1:length(unique.guild)){
# select current comparisons
curr.metric = unique.metric[i]
curr.guild = unique.guild[j]
# create temporary object
temp = metric_summary %>%
gather(Variable, Value, -Sample) %>%
filter(Variable == curr.metric) %>%
rename(code = Sample) %>%
left_join(carbon.use[which(carbon.use$Guild %in% curr.guild),], by = "code") %>%
filter(!is.na(Guild)) %>%
mutate(abund = abund*100)
# run spearman correlation
temp = rcorr(temp$Value, temp$abund)
# build output
temp = data.frame(Metric = curr.metric, Guild = curr.guild,
rho = temp$r[1,2], p_value = temp$P[1,2])
# add to correlation object
metric_stats = rbind(metric_stats, temp)
}
}
rm(temp, i, j, curr.metric, curr.guild, unique.guild, unique.metric)
# adjust p-value
metric_stats$padj = p.adjust(metric_stats$p_value, method = "fdr")
### Bailey relationships
# plot
summary_plot(bailey_set_summary, "Bailey Boundary Set Relationships")
## `geom_smooth()` using formula = 'y ~ x'
ggsave("Figures/Guild-Bailey_Relationships.pdf", height = 12, width = 12)
## `geom_smooth()` using formula = 'y ~ x'
# real stats
bailey_stats = summary_stat(bailey_set_summary)
bailey_stats$padj = p.adjust(bailey_stats$p_value, method = "fdr")
### Johnston relationships
# plot
summary_plot(johnston_set_summary, "Johnston Boundary Set Relationships")
## `geom_smooth()` using formula = 'y ~ x'
ggsave("Figures/Guild-Johnston_Relationships.pdf", height = 12, width = 12)
## `geom_smooth()` using formula = 'y ~ x'
# real stats
johnston_stats = summary_stat(johnston_set_summary)
johnston_stats$padj = p.adjust(johnston_stats$p_value, method = "fdr")
### Roebuck relationships
# plot
summary_plot(roebuck_set_summary, "Roebuck Boundary Set Relationships")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 348 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 348 rows containing missing values (`geom_point()`).
ggsave("Figures/Guild-Roebuck_Relationships.pdf", height = 12, width = 12)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 348 rows containing non-finite values (`stat_smooth()`).
## Removed 348 rows containing missing values (`geom_point()`).
# real stats
roebuck_stats = summary_stat(roebuck_set_summary)
roebuck_stats$padj = p.adjust(roebuck_stats$p_value, method = "fdr")
# write stats
write.csv(metric_stats, "Correlations/Guild-Metric_Relationships.csv",
quote = T, row.names = F)
write.csv(bailey_stats, "Correlations/Guild-Bailey_Relationships.csv",
quote = T, row.names = F)
write.csv(johnston_stats, "Correlations/Guild-Johnston_Relationships.csv",
quote = T, row.names = F)
write.csv(roebuck_stats, "Correlations/Guild-Roebuck_Relationships.csv",
quote = T, row.names = F)
# reorganizing the transformation object
trans = trans[,-1] %>% rownames_to_column("Transformation")
colnames(trans) = gsub("Sample_", "", colnames(trans))
# select methyl guild
methyl_guild = carbon.use[which(carbon.use$Guild %in% "methyl_c1"),]
# empty object
methyl_relation = NULL
# looping through pairwise comparisons
for(i in 1:nrow(trans)){
# select current comparisons
curr.trans = trans$Transformation[i]
# create temporary object
temp = trans %>%
gather(code, Count, -Transformation) %>%
group_by(code) %>% mutate(Rel_Count = (Count/sum(Count)*100)) %>%
filter(Transformation == curr.trans) %>%
left_join(methyl_guild, by = "code") %>%
filter(!is.na(abund)) %>%
mutate(abund = abund*100)
# run spearman correlation
temp = rcorr(temp$Rel_Count, temp$abund)
# build output
temp = data.frame(Transformation = curr.trans,
rho = temp$r[1,2], p_value = temp$P[1,2])
# add to correlation object
methyl_relation = rbind(methyl_relation, temp)
}
# adjust p-value
methyl_relation$padj = p.adjust(methyl_relation$p_value, method = "fdr")
# plotting methyl-C1H4 relationship for direct inspection
trans %>% gather(Sample, Count, -Transformation) %>%
group_by(Sample) %>%
mutate(`Relative Count (%)` = (Count/sum(Count))*100) %>%
filter(Transformation == "C1H4") %>% ungroup() %>%
gather(Variable, Value, -Transformation, -Sample) %>%
left_join(methyl_guild, by = c("Sample" = "code")) %>%
filter(!is.na(abund)) %>% mutate(abund = abund*100) %>%
ggplot(aes(x = Value, y = abund))+
geom_point(color = color["methyl_c1"])+
geom_smooth(method = "lm")+
facet_grid(.~Variable, scales = "free_x")+
xlab("Transformation Value")+
ylab("Relative Expression (%)")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggsave("Figures/Methyl-C1H4_Relationship.pdf")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
# write stats
write.csv(methyl_relation, "Correlations/Methyl-Transformations_Relationships.csv")